This file is used to generate a dataset containing all individual datasets, without melanocytes.
library(dplyr)
library(patchwork)
library(ggplot2)
library(ComplexHeatmap)
.libPaths()
## [1] "/usr/local/lib/R/library"
In this section, we set the global settings of the analysis. We will store data there :
save_name = "takahashi"
out_dir = "."
n_threads = 5 # for UMAP
We load the sample information :
sample_info = readRDS(paste0(out_dir, "/../1_metadata/takahashi_sample_info.rds"))
project_names_oi = sample_info$project_name
graphics::pie(rep(1, nrow(sample_info)),
col = sample_info$color,
labels = sample_info$project_name)
Here are custom colors for each cell type :
color_markers = readRDS(paste0(out_dir, "/../../1_metadata/hs_hd_color_markers.rds"))
data.frame(cell_type = names(color_markers),
color = unlist(color_markers)) %>%
ggplot2::ggplot(., aes(x = cell_type, y = 0, fill = cell_type)) +
ggplot2::geom_point(pch = 21, size = 5) +
ggplot2::scale_fill_manual(values = unlist(color_markers), breaks = names(color_markers)) +
ggplot2::theme_classic() +
ggplot2::theme(legend.position = "none",
axis.line = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
axis.text.y = element_blank(),
axis.text.x = element_text(angle = 30, hjust = 1))
We load the markers and specific colors for each cell type :
cell_markers = readRDS(paste0(out_dir, "/../../1_metadata/hs_hd_cell_markers.rds"))
lengths(cell_markers)
## CD4 T cells CD8 T cells Langerhans cells
## 13 13 9
## macrophages B cells cuticle
## 10 16 15
## cortex medulla IRS
## 16 10 16
## proliferative HF-SCs IFE basal
## 20 17 16
## IFE granular spinous ORS melanocytes
## 17 15 10
## sebocytes
## 8
We load markers to display on the dotplot :
dotplot_markers = readRDS(paste0(out_dir, "/../../1_metadata/hs_hd_dotplot_markers.rds"))
dotplot_markers
## $`CD4 T cells`
## [1] "PTPRC" "CD3E" "CD4"
##
## $`CD8 T cells`
## [1] "CD3E" "CD8A"
##
## $`Langerhans cells`
## [1] "CD207" "CPVL"
##
## $macrophages
## [1] "TREM2" "MSR1"
##
## $`B cells`
## [1] "CD79A" "CD79B"
##
## $cuticle
## [1] "MSX2" "KRT32" "KRT35"
##
## $cortex
## [1] "KRT31" "PRR9"
##
## $medulla
## [1] "BAMBI" "ADLH1A3"
##
## $IRS
## [1] "KRT71" "KRT73"
##
## $proliferative
## [1] "TOP2A" "MCM5" "TK1"
##
## $`HF-SCs`
## [1] "KRT14" "CXCL14"
##
## $`IFE basal`
## [1] "COL17A1" "KRT15"
##
## $`IFE granular spinous`
## [1] "SPINK5" "KRT1"
##
## $ORS
## [1] "KRT16" "KRT6C"
##
## $melanocytes
## [1] "DCT" "MLANA"
##
## $sebocytes
## [1] "CLMP" "PPARG"
For each sample, we :
We load individual datasets :
sobj_list = lapply(project_names_oi, FUN = function(one_project_name) {
subsobj = readRDS(paste0(out_dir, "/../2_individual/datasets/",
one_project_name, "_sobj_filtered.rds"))
return(subsobj)
})
names(sobj_list) = project_names_oi
lapply(sobj_list, FUN = dim) %>%
do.call(rbind, .) %>%
rbind(., colSums(.))
## [,1] [,2]
## GSM3717034 10320 71
## GSM3717035 12129 275
## GSM3717036 14170 510
## GSM3717037 32458 4084
## GSM3717038 32458 1094
## 101535 6034
We represent cells in the UMAP :
name2D = "RNA_pca_20_umap"
We look at cell type annotation for each dataset :
plot_list = lapply(sobj_list, FUN = function(one_sobj) {
mytitle = as.character(unique(one_sobj$project_name))
mysubtitle = ncol(one_sobj)
p = Seurat::DimPlot(one_sobj, group.by = "cell_type",
reduction = name2D) +
ggplot2::scale_color_manual(values = color_markers,
breaks = names(color_markers),
name = "Cell Type") +
ggplot2::labs(title = mytitle,
subtitle = paste0(mysubtitle, " cells")) +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)) +
Seurat::NoAxes()
return(p)
})
plot_list[[length(plot_list) + 1]] = patchwork::guide_area()
patchwork::wrap_plots(plot_list, ncol = 3) +
patchwork::plot_layout(guides = "collect") &
ggplot2::theme(legend.position = "right")
and clustering :
plot_list = lapply(sobj_list, FUN = function(one_sobj) {
mytitle = as.character(unique(one_sobj$project_name))
mysubtitle = ncol(one_sobj)
p = Seurat::DimPlot(one_sobj, group.by = "seurat_clusters",
reduction = name2D, label = TRUE) +
ggplot2::labs(title = mytitle,
subtitle = paste0(mysubtitle, " cells")) +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)) +
Seurat::NoAxes() + Seurat::NoLegend()
return(p)
})
patchwork::wrap_plots(plot_list, ncol = 3)
This dataset gathers 3 samples obtained with Drop-Seq and 2 obtained with 10X. They may not contain the same genes. We compare the gene identifiers between the two 10X datasets :
ggvenn::ggvenn(data = list(GSM3717037 = sobj_list[["GSM3717037"]]@assays[["RNA"]]@meta.features$Ensembl_ID,
GSM3717038 = sobj_list[["GSM3717038"]]@assays[["RNA"]]@meta.features$Ensembl_ID),
stroke_size = 0.5, set_name_size = 4) +
ggplot2::labs(title = "Gene Ensembl IDs between the two 10X datasets") +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"))
The two 10X datasets contain the same gene IDs. What about with the three Drop-Seq datasets ?
Note: With the ggvenn package, this is not possible to
make a Venn diagram with 5 sets.
ggvenn::ggvenn(data = list(GSM3717034 = sobj_list[["GSM3717034"]]@assays[["RNA"]]@meta.features$Ensembl_ID,
GSM3717035 = sobj_list[["GSM3717035"]]@assays[["RNA"]]@meta.features$Ensembl_ID,
GSM3717036 = sobj_list[["GSM3717036"]]@assays[["RNA"]]@meta.features$Ensembl_ID,
GSM3717037 = sobj_list[["GSM3717037"]]@assays[["RNA"]]@meta.features$Ensembl_ID),
stroke_size = 0.5, set_name_size = 4) +
ggplot2::labs(title = "Gene Ensembl IDs between the three Drop-Seq datasets and one 10X") +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"))
There are much less genes detected with the Drop-Seq method. We keep only the genes whose Ensembl IDs are shared between all datasets. When we built individual datasets, we used the same gene annotation. So, genes with the same Ensembl IDs across all datasets have the same names. So, we can work with gene names.
common_genes = Reduce(x = lapply(sobj_list, FUN = rownames),
f = intersect)
length(common_genes)
## [1] 7854
It discards a lot of information regarding 10X datasets. So, we keep all common genes between all datasets + common genes between the two 10X datasets.
common_genes = intersect(rownames(sobj_list$GSM3717037), rownames(sobj_list$GSM3717038)) %>%
c(common_genes, .) %>%
unique()
length(common_genes)
## [1] 32458
We subset all Seurat objects to keep only these genes :
sobj_list = lapply(sobj_list, FUN = function(one_sobj) {
avail_genes = intersect(rownames(one_sobj), common_genes)
one_sobj = one_sobj[avail_genes, ]
return(one_sobj)
})
lapply(sobj_list, FUN = dim) %>%
do.call(rbind, .) %>%
rbind(., colSums(.)) %>%
`colnames<-`(c("Nb genes", "Nb cells"))
## Nb genes Nb cells
## GSM3717034 9118 71
## GSM3717035 10954 275
## GSM3717036 12218 510
## GSM3717037 32458 4084
## GSM3717038 32458 1094
## 97206 6034
For each individual dataset, we remove melanocytes. First, we smooth cell type annotation at a cluster level :
sobj_list = lapply(sobj_list, FUN = function(one_sobj) {
cluster_type = table(one_sobj$cell_type, one_sobj$seurat_clusters) %>%
prop.table(., margin = 2) %>%
apply(., 2, which.max)
cluster_type = setNames(nm = names(cluster_type),
levels(one_sobj$cell_type)[cluster_type])
one_sobj$cluster_type = cluster_type[one_sobj$seurat_clusters]
## Output
return(one_sobj)
})
To locate melanocytes, we look at their score, cell type annotation, and clustering.
plot_list = lapply(sobj_list, FUN = function(one_sobj) {
project_name = as.character(unique(one_sobj$project_name))
plot_sublist = list()
# Score
plot_sublist[[1]] = Seurat::FeaturePlot(one_sobj, reduction = name2D,
features = "score_melanocytes") +
ggplot2::labs(title = project_name,
subtitle = "Melanocytes score") +
Seurat::NoAxes() +
ggplot2::scale_color_gradientn(colors = aquarius:::color_gene) +
ggplot2::theme(aspect.ratio = 1,
plot.subtitle = element_text(hjust = 0.5))
# Cell type
plot_sublist[[2]] = Seurat::DimPlot(one_sobj,
reduction = name2D,
group.by = "cell_type",
order = "melanocytes") +
ggplot2::scale_color_manual(values = c("purple", rep("gray92", length(color_markers) - 1)),
breaks = c("melanocytes", setdiff(names(color_markers), "melanocytes"))) +
ggplot2::labs(title = "Cell type annotation",
subtitle = paste0(sum(one_sobj$cell_type == "melanocytes"),
" melanocytes")) +
Seurat::NoAxes() + Seurat::NoLegend() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
# Clusters
plot_sublist[[3]] = Seurat::DimPlot(one_sobj,
reduction = name2D,
group.by = "seurat_clusters",
label = TRUE) +
ggplot2::labs(title = "Clusters") +
Seurat::NoAxes() + Seurat::NoLegend() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
# Cluster type
plot_sublist[[4]] = Seurat::DimPlot(one_sobj,
reduction = name2D,
group.by = "cluster_type") +
ggplot2::scale_color_manual(values = c("purple", rep("gray92", length(color_markers) - 1)),
breaks = c("melanocytes", setdiff(names(color_markers), "melanocytes"))) +
ggplot2::labs(title = "Cluster annotation",
subtitle = paste0(sum(one_sobj$cluster_type == "melanocytes"),
" melanocytes")) +
Seurat::NoAxes() + Seurat::NoLegend() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
return(plot_sublist)
}) %>% unlist(., recursive = FALSE)
patchwork::wrap_plots(plot_list, ncol = 4)
We remove melanocytes based on cluster annotation for 10X datasets and based on the cell type annotation for Drop-Seq datasets :
sobj_list = lapply(sobj_list, FUN = function(one_sobj) {
if (one_sobj@project.name %in% c("GSM3717037", "GSM3717038")) {
one_sobj$is_of_interest = (one_sobj$cluster_type != "melanocytes")
} else {
one_sobj$is_of_interest = (one_sobj$cell_type != "melanocytes")
}
if (sum(one_sobj$is_of_interest) > 0) {
one_sobj = subset(one_sobj, is_of_interest == TRUE)
} else {
one_sobj = NA
}
one_sobj$is_of_interest = NULL
return(one_sobj)
})
lapply(sobj_list, FUN = dim) %>%
do.call(rbind, .) %>%
rbind(., colSums(.))
## [,1] [,2]
## GSM3717034 9118 65
## GSM3717035 10954 270
## GSM3717036 12218 497
## GSM3717037 32458 3781
## GSM3717038 32458 1023
## 97206 5636
We remove melanocytes from annotation :
cell_markers = cell_markers[names(cell_markers) != "melanocytes"]
color_markers = color_markers[names(color_markers) != "melanocytes"]
dotplot_markers = dotplot_markers[names(dotplot_markers) != "melanocytes"]
We re-annotate cells for cell type, since melanocytes have been removed :
sobj_list = lapply(sobj_list, FUN = function(one_sobj) {
# Remove old annotation
one_sobj@meta.data[, grep(colnames(one_sobj@meta.data), pattern = "score", value = TRUE)] = NULL
# Re-annot
one_sobj = aquarius::cell_annot_custom(one_sobj,
newname = "cell_type",
markers = cell_markers,
use_negative = TRUE,
add_score = FALSE,
verbose = TRUE)
# Set factor levels
one_sobj$cell_type = factor(one_sobj$cell_type, levels = names(cell_markers))
return(one_sobj)
})
We combine all datasets :
sobj = base::merge(sobj_list[[1]],
y = sobj_list[c(2:length(sobj_list))],
add.cell.ids = names(sobj_list))
sobj
## An object of class Seurat
## 32458 features across 5636 samples within 1 assay
## Active assay: RNA (32458 features, 0 variable features)
We add again the correspondence between gene names and gene ID. We take the correspondence from one individual 10X dataset.
sobj@assays$RNA@meta.features = sobj_list[[5]]@assays$RNA@meta.features[, c("Ensembl_ID", "gene_name")]
head(sobj@assays$RNA@meta.features)
## Ensembl_ID gene_name
## MIR1302-10 ENSG00000259553 MIR1302-10
## FAM138A ENSG00000237613 FAM138A
## OR4F5 ENSG00000186092 OR4F5
## RP11-34P13.7 ENSG00000238009 RP11-34P13.7
## RP11-34P13.8 ENSG00000239945 RP11-34P13.8
## AL627309.1 ENSG00000237683 AL627309.1
We remove the list of objects :
rm(sobj_list)
We keep a subset of meta.data and reset levels :
sobj@meta.data = sobj@meta.data[, c("orig.ident", "nCount_RNA", "nFeature_RNA", "log_nCount_RNA",
"project_name", "sample_identifier", "sample_type",
"laboratory", "location", "Seurat.Phase", "cyclone.Phase",
"percent.mt", "percent.rb", "cell_type")]
sobj$orig.ident = factor(sobj$orig.ident, levels = levels(sample_info$project_name))
sobj$project_name = factor(sobj$project_name, levels = levels(sample_info$project_name))
sobj$sample_identifier = factor(sobj$sample_identifier, levels = levels(sample_info$sample_identifier))
sobj$sample_type = factor(sobj$sample_type, levels = unique(sample_info$sample_type))
sobj$cell_type = factor(sobj$cell_type, levels = names(color_markers))
summary(sobj@meta.data)
## orig.ident nCount_RNA nFeature_RNA log_nCount_RNA
## GSM3717034: 65 Min. : 269 Min. : 206.0 Min. : 5.762
## GSM3717035: 270 1st Qu.: 2338 1st Qu.: 918.8 1st Qu.: 7.772
## GSM3717036: 497 Median : 7776 Median :2229.0 Median : 8.959
## GSM3717037:3781 Mean : 9513 Mean :2108.6 Mean : 8.624
## GSM3717038:1023 3rd Qu.:14035 3rd Qu.:2977.0 3rd Qu.: 9.549
## Max. :48372 Max. :6334.0 Max. :10.787
##
## project_name sample_identifier sample_type laboratory
## GSM3717034: 65 Takahashi_HD_1: 65 HD:5636 Length:5636
## GSM3717035: 270 Takahashi_HD_2: 270 Class :character
## GSM3717036: 497 Takahashi_HD_3: 497 Mode :character
## GSM3717037:3781 Takahashi_HD_4:3781
## GSM3717038:1023 Takahashi_HD_5:1023
##
##
## location Seurat.Phase cyclone.Phase percent.mt
## Length:5636 Length:5636 Length:5636 Min. : 0.000
## Class :character Class :character Class :character 1st Qu.: 1.961
## Mode :character Mode :character Mode :character Median : 3.007
## Mean : 3.668
## 3rd Qu.: 4.196
## Max. :19.934
##
## percent.rb cell_type
## Min. : 1.381 HF-SCs :1335
## 1st Qu.:20.621 IFE granular spinous:1321
## Median :25.107 IFE basal : 839
## Mean :24.076 ORS : 716
## 3rd Qu.:28.864 proliferative : 301
## Max. :48.039 IRS : 236
## (Other) : 888
We remove genes that are expressed in less than 5 cells :
sobj = aquarius::filter_features(sobj, min_cells = 5)
## [1] 32458 5636
## [1] 18588 5636
sobj
## An object of class Seurat
## 18588 features across 5636 samples within 1 assay
## Active assay: RNA (18588 features, 0 variable features)
How many cells by sample ?
table(sobj$project_name)
##
## GSM3717034 GSM3717035 GSM3717036 GSM3717037 GSM3717038
## 65 270 497 3781 1023
We represent this information as a barplot :
aquarius::plot_barplot(df = table(sobj$project_name,
sobj$cell_type) %>%
as.data.frame.table() %>%
`colnames<-`(c("Sample", "Cell Type", "Number")),
x = "Sample", y = "Number", fill = "Cell Type",
position = position_fill()) +
ggplot2::scale_fill_manual(values = unlist(color_markers),
breaks = names(color_markers),
name = "Cell Type")
This is the same barplot with another position :
aquarius::plot_barplot(df = table(sobj$project_name,
sobj$cell_type) %>%
as.data.frame.table() %>%
`colnames<-`(c("Sample", "Cell Type", "Number")),
x = "Sample", y = "Number", fill = "Cell Type",
position = position_stack()) +
ggplot2::scale_fill_manual(values = unlist(color_markers),
breaks = names(color_markers),
name = "Cell Type")
We normalize the count matrix for remaining cells and select highly variable features :
sobj = Seurat::NormalizeData(sobj,
normalization.method = "LogNormalize")
sobj = Seurat::FindVariableFeatures(sobj, nfeatures = 2000)
sobj = Seurat::ScaleData(sobj)
sobj
## An object of class Seurat
## 18588 features across 5636 samples within 1 assay
## Active assay: RNA (18588 features, 2000 variable features)
We perform a PCA :
sobj = Seurat::RunPCA(sobj,
assay = "RNA",
reduction.name = "RNA_pca",
npcs = 100,
seed.use = 1337L)
sobj
## An object of class Seurat
## 18588 features across 5636 samples within 1 assay
## Active assay: RNA (18588 features, 2000 variable features)
## 1 dimensional reduction calculated: RNA_pca
We choose the number of dimensions such that they summarize 60 % of the variability :
stdev = sobj@reductions[["RNA_pca"]]@stdev
stdev_prop = cumsum(stdev)/sum(stdev)
ndims = which(stdev_prop > 0.60)[1]
ndims
## [1] 41
We can visualize this on the elbow plot :
elbow_p = Seurat::ElbowPlot(sobj, ndims = 100, reduction = "RNA_pca") +
ggplot2::geom_point(x = ndims, y = stdev[ndims], col = "red")
x_text = ggplot_build(elbow_p)$layout$panel_params[[1]]$x$get_labels() %>% as.numeric()
elbow_p = elbow_p +
ggplot2::scale_x_continuous(breaks = sort(c(x_text, ndims)), limits = c(0, 100))
x_color = ifelse(ggplot_build(elbow_p)$layout$panel_params[[1]]$x$get_labels() %>%
as.numeric() %>% round(., 2) == round(ndims, 2), "red", "black")
elbow_p = elbow_p +
ggplot2::theme_classic() +
ggplot2::theme(axis.text.x = element_text(color = x_color))
elbow_p
We generate a UMAP and a UMAP with 41 principal components :
sobj = Seurat::RunTSNE(sobj,
reduction = "RNA_pca",
dims = 1:ndims,
seed.use = 1337L,
num_threads = n_threads, # Rtsne::Rtsne option
check_duplicates = FALSE,
reduction.name = paste0("RNA_pca_", ndims, "_tsne"))
sobj = Seurat::RunUMAP(sobj,
reduction = "RNA_pca",
dims = 1:ndims,
seed.use = 1337L,
reduction.name = paste0("RNA_pca_", ndims, "_umap"))
(Time to run : 25.22 s)
We can visualize the two representations :
tsne = Seurat::DimPlot(sobj, group.by = "project_name",
reduction = paste0("RNA_pca_", ndims, "_umap")) +
ggplot2::scale_color_manual(values = sample_info$color,
breaks = sample_info$project_name) +
Seurat::NoAxes() + ggplot2::ggtitle("PCA - UMAP") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
legend.position = "none")
umap = Seurat::DimPlot(sobj, group.by = "project_name",
reduction = paste0("RNA_pca_", ndims, "_umap")) +
ggplot2::scale_color_manual(values = sample_info$color,
breaks = sample_info$project_name) +
Seurat::NoAxes() + ggplot2::ggtitle("PCA - UMAP") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
tsne | umap
We remove sample specific effect on the pca using Harmony :
`%||%` = function(lhs, rhs) {
if (!is.null(x = lhs)) {
return(lhs)
} else {
return(rhs)
}
}
set.seed(1337L)
sobj = harmony::RunHarmony(object = sobj,
group.by.vars = "project_name",
plot_convergence = TRUE,
reduction = "RNA_pca",
assay.use = "RNA",
reduction.save = "harmony",
max.iter.harmony = 50,
project.dim = FALSE)
(Time
to run : 19 s)
From this batch-effect removed projection, we generate a UMAP and a UMAP.
sobj = Seurat::RunUMAP(sobj,
seed.use = 1337L,
dims = 1:ndims,
reduction = "harmony",
reduction.name = paste0("harmony_", ndims, "_umap"),
reduction.key = paste0("harmony_", ndims, "umap_"))
sobj = Seurat::RunTSNE(sobj,
dims = 1:ndims,
seed.use = 1337L,
num_threads = n_threads, # Rtsne::Rtsne option
check_duplicates = FALSE,
reduction = "harmony",
reduction.name = paste0("harmony_", ndims, "_tsne"),
reduction.key = paste0("harmony", ndims, "tsne_"))
(Time to run : 25.3 s)
We visualize the corrected projections :
tsne = Seurat::DimPlot(sobj, group.by = "project_name",
reduction = paste0("harmony_", ndims, "_umap")) +
ggplot2::scale_color_manual(values = sample_info$color,
breaks = sample_info$project_name) +
Seurat::NoAxes() + ggplot2::ggtitle("PCA - harmony - UMAP") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
legend.position = "none")
umap = Seurat::DimPlot(sobj, group.by = "project_name",
reduction = paste0("harmony_", ndims, "_umap")) +
ggplot2::scale_color_manual(values = sample_info$color,
breaks = sample_info$project_name) +
Seurat::NoAxes() + ggplot2::ggtitle("PCA - harmony - UMAP") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
tsne | umap
We will keep the tSNE from harmony :
reduction = "harmony"
name2D = paste0("harmony_", ndims, "_tsne")
We generate a clustering :
sobj = Seurat::FindNeighbors(sobj, reduction = reduction, dims = 1:ndims)
sobj = Seurat::FindClusters(sobj, resolution = 1.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5636
## Number of edges: 226335
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8389
## Number of communities: 25
## Elapsed time: 0 seconds
clusters_plot = Seurat::DimPlot(sobj, reduction = name2D, label = TRUE) +
Seurat::NoAxes() + Seurat::NoLegend() +
ggplot2::labs(title = "Clusters ID") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
clusters_plot
We represent the 4 quality metrics :
plot_list = Seurat::FeaturePlot(sobj, reduction = name2D,
combine = FALSE, pt.size = 0.25,
features = c("percent.mt", "percent.rb", "log_nCount_RNA", "nFeature_RNA"))
plot_list = lapply(plot_list, FUN = function(one_plot) {
one_plot +
Seurat::NoAxes() +
ggplot2::scale_color_gradientn(colors = aquarius:::color_gene) +
ggplot2::theme(aspect.ratio = 1)
})
patchwork::wrap_plots(plot_list, nrow = 1)
We can represent clusters, split by sample of origin :
plot_list = aquarius::plot_split_dimred(sobj,
reduction = name2D,
split_by = "project_name",
group_by = "seurat_clusters",
split_color = setNames(sample_info$color,
nm = sample_info$project_name),
group_color = aquarius::gg_color_hue(length(levels(sobj$seurat_clusters))))
plot_list[[length(plot_list) + 1]] = clusters_plot +
ggplot2::labs(title = "Cluster ID") &
ggplot2::theme(plot.title = element_text(hjust = 0.5, size = 15))
patchwork::wrap_plots(plot_list, ncol = 3) &
Seurat::NoLegend()
We visualize cell type :
plot_list = lapply((c(paste0("RNA_pca_", ndims, "_umap"),
paste0("RNA_pca_", ndims, "_umap"),
paste0("harmony_", ndims, "_umap"),
paste0("harmony_", ndims, "_umap"))),
FUN = function(one_red) {
Seurat::DimPlot(sobj, group.by = "cell_type",
reduction = one_red,
cols = color_markers) +
Seurat::NoAxes() + ggplot2::ggtitle(one_red) +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
})
patchwork::wrap_plots(plot_list, nrow = 2) +
patchwork::plot_layout(guides = "collect")
We make a representation split by origin to show cell types :
plot_list = aquarius::plot_split_dimred(sobj,
reduction = name2D,
split_by = "project_name",
split_color = setNames(sample_info$color,
nm = sample_info$project_name),
group_by = "cell_type",
group_color = color_markers)
plot_list[[length(plot_list) + 1]] = patchwork::guide_area()
patchwork::wrap_plots(plot_list, ncol = 3) +
patchwork::plot_layout(guides = "collect") &
ggplot2::theme(legend.position = "right")
We visualize cell cycle annotation, and BIRC5 and TOP2A expression levels :
plot_list = list()
# Seurat
plot_list[[1]] = Seurat::DimPlot(sobj, group.by = "Seurat.Phase",
reduction = name2D) +
Seurat::NoAxes() + ggplot2::labs(title = "Seurat annotation") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
# cyclone
plot_list[[2]] = Seurat::DimPlot(sobj, group.by = "cyclone.Phase",
reduction = name2D) +
Seurat::NoAxes() + ggplot2::labs(title = "cyclone annotation") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
# BIRC5
plot_list[[3]] = Seurat::FeaturePlot(sobj, features = "BIRC5",
reduction = name2D) +
ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
# TOP2A
plot_list[[4]] = Seurat::FeaturePlot(sobj, features = "TOP2A",
reduction = name2D) +
ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
patchwork::wrap_plots(plot_list, ncol = 2)
We save the Seurat object :
saveRDS(sobj, file = paste0(out_dir, "/", save_name, "_sobj.rds"))
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/local/lib/R/lib/libRblas.so
## LAPACK: /usr/local/lib/R/lib/libRlapack.so
##
## locale:
## [1] C
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] ComplexHeatmap_2.14.0 ggplot2_3.3.5 patchwork_1.1.2
## [4] dplyr_1.0.7
##
## loaded via a namespace (and not attached):
## [1] softImpute_1.4 graphlayouts_0.7.0
## [3] pbapply_1.4-2 lattice_0.20-41
## [5] haven_2.3.1 vctrs_0.3.8
## [7] usethis_2.0.1 dynwrap_1.2.1
## [9] blob_1.2.1 survival_3.2-13
## [11] prodlim_2019.11.13 dynutils_1.0.5
## [13] later_1.3.0 DBI_1.1.1
## [15] R.utils_2.11.0 SingleCellExperiment_1.8.0
## [17] rappdirs_0.3.3 uwot_0.1.8
## [19] dqrng_0.2.1 jpeg_0.1-8.1
## [21] zlibbioc_1.32.0 pspline_1.0-18
## [23] pcaMethods_1.78.0 mvtnorm_1.1-1
## [25] htmlwidgets_1.5.4 GlobalOptions_0.1.2
## [27] future_1.22.1 UpSetR_1.4.0
## [29] laeken_0.5.2 leiden_0.3.3
## [31] clustree_0.4.3 parallel_3.6.3
## [33] scater_1.14.6 irlba_2.3.3
## [35] DEoptimR_1.0-9 tidygraph_1.1.2
## [37] Rcpp_1.0.9 readr_2.0.2
## [39] KernSmooth_2.23-17 carrier_0.1.0
## [41] promises_1.1.0 gdata_2.18.0
## [43] DelayedArray_0.12.3 limma_3.42.2
## [45] graph_1.64.0 RcppParallel_5.1.4
## [47] Hmisc_4.4-0 fs_1.5.2
## [49] RSpectra_0.16-0 fastmatch_1.1-0
## [51] ranger_0.12.1 digest_0.6.25
## [53] png_0.1-7 sctransform_0.2.1
## [55] cowplot_1.0.0 DOSE_3.12.0
## [57] here_1.0.1 TInGa_0.0.0.9000
## [59] ggvenn_0.1.9 ggraph_2.0.3
## [61] pkgconfig_2.0.3 GO.db_3.10.0
## [63] DelayedMatrixStats_1.8.0 gower_0.2.1
## [65] ggbeeswarm_0.6.0 iterators_1.0.12
## [67] DropletUtils_1.6.1 reticulate_1.26
## [69] clusterProfiler_3.14.3 SummarizedExperiment_1.16.1
## [71] circlize_0.4.15 beeswarm_0.4.0
## [73] GetoptLong_1.0.5 xfun_0.35
## [75] bslib_0.3.1 zoo_1.8-10
## [77] tidyselect_1.1.0 reshape2_1.4.4
## [79] purrr_0.3.4 ica_1.0-2
## [81] pcaPP_1.9-73 viridisLite_0.3.0
## [83] rtracklayer_1.46.0 rlang_1.0.2
## [85] hexbin_1.28.1 jquerylib_0.1.4
## [87] dyneval_0.9.9 glue_1.4.2
## [89] RColorBrewer_1.1-2 matrixStats_0.56.0
## [91] stringr_1.4.0 lava_1.6.7
## [93] europepmc_0.3 DESeq2_1.26.0
## [95] recipes_0.1.17 labeling_0.3
## [97] harmony_0.1.0 httpuv_1.5.2
## [99] class_7.3-17 BiocNeighbors_1.4.2
## [101] DO.db_2.9 annotate_1.64.0
## [103] jsonlite_1.7.2 XVector_0.26.0
## [105] bit_4.0.4 mime_0.9
## [107] aquarius_0.1.5 Rsamtools_2.2.3
## [109] gridExtra_2.3 gplots_3.0.3
## [111] stringi_1.4.6 processx_3.5.2
## [113] gsl_2.1-6 bitops_1.0-6
## [115] cli_3.0.1 batchelor_1.2.4
## [117] RSQLite_2.2.0 randomForest_4.6-14
## [119] tidyr_1.1.4 data.table_1.14.2
## [121] rstudioapi_0.13 org.Mm.eg.db_3.10.0
## [123] GenomicAlignments_1.22.1 nlme_3.1-147
## [125] qvalue_2.18.0 scran_1.14.6
## [127] locfit_1.5-9.4 scDblFinder_1.1.8
## [129] listenv_0.8.0 ggthemes_4.2.4
## [131] gridGraphics_0.5-0 R.oo_1.24.0
## [133] dbplyr_1.4.4 BiocGenerics_0.32.0
## [135] TTR_0.24.2 readxl_1.3.1
## [137] lifecycle_1.0.1 timeDate_3043.102
## [139] ggpattern_0.3.1 munsell_0.5.0
## [141] cellranger_1.1.0 R.methodsS3_1.8.1
## [143] proxyC_0.1.5 visNetwork_2.0.9
## [145] caTools_1.18.0 codetools_0.2-16
## [147] Biobase_2.46.0 GenomeInfoDb_1.22.1
## [149] vipor_0.4.5 lmtest_0.9-38
## [151] msigdbr_7.5.1 htmlTable_1.13.3
## [153] triebeard_0.3.0 lsei_1.2-0
## [155] xtable_1.8-4 ROCR_1.0-7
## [157] BiocManager_1.30.10 scatterplot3d_0.3-41
## [159] abind_1.4-5 farver_2.0.3
## [161] parallelly_1.28.1 RANN_2.6.1
## [163] askpass_1.1 GenomicRanges_1.38.0
## [165] RcppAnnoy_0.0.16 tibble_3.1.5
## [167] ggdendro_0.1-20 cluster_2.1.0
## [169] future.apply_1.5.0 Seurat_3.1.5
## [171] dendextend_1.15.1 Matrix_1.3-2
## [173] ellipsis_0.3.2 prettyunits_1.1.1
## [175] lubridate_1.7.9 ggridges_0.5.2
## [177] igraph_1.2.5 RcppEigen_0.3.3.7.0
## [179] fgsea_1.12.0 remotes_2.4.2
## [181] scBFA_1.0.0 destiny_3.0.1
## [183] VIM_6.1.1 testthat_3.1.0
## [185] htmltools_0.5.2 BiocFileCache_1.10.2
## [187] yaml_2.2.1 utf8_1.1.4
## [189] plotly_4.9.2.1 XML_3.99-0.3
## [191] ModelMetrics_1.2.2.2 e1071_1.7-3
## [193] foreign_0.8-76 withr_2.5.0
## [195] fitdistrplus_1.0-14 BiocParallel_1.20.1
## [197] xgboost_1.4.1.1 bit64_4.0.5
## [199] foreach_1.5.0 robustbase_0.93-9
## [201] Biostrings_2.54.0 GOSemSim_2.13.1
## [203] rsvd_1.0.3 memoise_2.0.0
## [205] evaluate_0.18 forcats_0.5.0
## [207] rio_0.5.16 geneplotter_1.64.0
## [209] tzdb_0.1.2 caret_6.0-86
## [211] ps_1.6.0 DiagrammeR_1.0.6.1
## [213] curl_4.3 fdrtool_1.2.15
## [215] fansi_0.4.1 highr_0.8
## [217] urltools_1.7.3 xts_0.12.1
## [219] GSEABase_1.48.0 acepack_1.4.1
## [221] edgeR_3.28.1 checkmate_2.0.0
## [223] scds_1.2.0 cachem_1.0.6
## [225] npsurv_0.4-0 babelgene_22.3
## [227] rjson_0.2.20 openxlsx_4.1.5
## [229] ggrepel_0.9.1 clue_0.3-60
## [231] rprojroot_2.0.2 stabledist_0.7-1
## [233] tools_3.6.3 sass_0.4.0
## [235] nichenetr_1.1.1 magrittr_2.0.1
## [237] RCurl_1.98-1.2 proxy_0.4-24
## [239] car_3.0-11 ape_5.3
## [241] ggplotify_0.0.5 xml2_1.3.2
## [243] httr_1.4.2 assertthat_0.2.1
## [245] rmarkdown_2.18 boot_1.3-25
## [247] globals_0.14.0 R6_2.4.1
## [249] Rhdf5lib_1.8.0 nnet_7.3-14
## [251] RcppHNSW_0.2.0 progress_1.2.2
## [253] genefilter_1.68.0 statmod_1.4.34
## [255] gtools_3.8.2 shape_1.4.6
## [257] HDF5Array_1.14.4 BiocSingular_1.2.2
## [259] rhdf5_2.30.1 splines_3.6.3
## [261] AUCell_1.8.0 carData_3.0-4
## [263] colorspace_1.4-1 generics_0.1.0
## [265] stats4_3.6.3 base64enc_0.1-3
## [267] dynfeature_1.0.0 smoother_1.1
## [269] gridtext_0.1.1 pillar_1.6.3
## [271] tweenr_1.0.1 sp_1.4-1
## [273] ggplot.multistats_1.0.0 rvcheck_0.1.8
## [275] GenomeInfoDbData_1.2.2 plyr_1.8.6
## [277] gtable_0.3.0 zip_2.2.0
## [279] knitr_1.41 latticeExtra_0.6-29
## [281] biomaRt_2.42.1 IRanges_2.20.2
## [283] fastmap_1.1.0 ADGofTest_0.3
## [285] copula_1.0-0 doParallel_1.0.15
## [287] AnnotationDbi_1.48.0 vcd_1.4-8
## [289] babelwhale_1.0.1 openssl_1.4.1
## [291] scales_1.1.1 backports_1.2.1
## [293] S4Vectors_0.24.4 ipred_0.9-12
## [295] enrichplot_1.6.1 hms_1.1.1
## [297] ggforce_0.3.1 Rtsne_0.15
## [299] shiny_1.7.1 numDeriv_2016.8-1.1
## [301] polyclip_1.10-0 lazyeval_0.2.2
## [303] Formula_1.2-3 tsne_0.1-3
## [305] crayon_1.3.4 MASS_7.3-54
## [307] pROC_1.16.2 viridis_0.5.1
## [309] dynparam_1.0.0 rpart_4.1-15
## [311] zinbwave_1.8.0 compiler_3.6.3
## [313] ggtext_0.1.0